Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see https://quarto.org.
2 Problem 1 - OOP Programming
2.1 a. For the rational class, define the following:
# Load the necessary librarieslibrary(Rcpp)library(methods)# Use Rcpp to define the GCD and LCM functionscppFunction('#include <numeric>int gcd(int a, int b) { return std::gcd(a, b);}')cppFunction('#include <numeric>int lcm(int a, int b) { return std::lcm(a, b);}')# Set the rational classsetClass("rational",slots =list(numerator ="integer", denominator ="integer"),prototype =list(numerator =integer(0), denominator =integer(1)))#' rational number constructor#' #' @description Creates an instance of the rational class.#' @param numerator Integer, the numerator of the rational number.#' @param denominator Integer, the denominator of the rational number. Must not be zero.#' @return An object of class 'rational'.#' @examples#' r1 <- rational(1, 2)#' r2 <- rational(3, 4)rational <-function(numerator =0, denominator =1) {if (!is.numeric(numerator) ||!is.numeric(denominator)) {stop("Both numerator and denominator must be numeric.") }if (numerator !=as.integer(numerator) | denominator !=as.integer(denominator)) {stop('Both numerator and denominator can not be floating point') }if (denominator ==0) {stop("The denominator can not be zero.") }new("rational", numerator =as.integer(numerator), denominator =as.integer(denominator))}# Validator to ensure the denominator is not zerosetValidity("rational", function(object) {if (object@denominator ==0L) return("The denominator cannot be zero")TRUE})
# Method to display the rational numbersetMethod("show", "rational", function(object) {cat(object@numerator, "/", object@denominator, "\n")})# Generic and method to simplify and quotient rational numberssetGeneric("simplify", function(object) standardGeneric("simplify"))
#' Simplify a rational number#' #' @description Reduces the rational number to its simplest form using the greatest common divisor.#' @param object A 'rational' object.#' @return A simplified 'rational' object.setMethod("simplify", "rational", function(object) { g <-gcd(object@numerator, object@denominator) new_numerator <- object@numerator / g new_denominator <- object@denominator / gif (new_numerator <0& new_denominator <0) { new_numerator <-abs(new_numerator) new_denominator <-abs(new_denominator) }return(rational(new_numerator, new_denominator))})#' Calculate the decimal quotient of a rational number#' #' @description Computes the decimal value of the rational number and prints it formatted to a specified number of decimal places.#' @param object A 'rational' object.#' @param digits Numerical, number of decimal places for formatting the output.#' @return The quotient as a numeric value, printed but not returned.setMethod("quotient", "rational", function(object, digits =0) {if (digits <0|!is.numeric(digits)) {stop('Digit Must be greater than or equal to 0.') } quotient <- object@numerator / object@denominatorprint(formatC(quotient, format ="f", digits = digits))return(invisible(quotient))})# Methods for arithmetic operationssetMethod('+', signature(e1 ='rational', e2 ='rational'), function(e1, e2) { denom <-lcm(e1@denominator, e2@denominator) numer <- (e1@numerator * (denom / e1@denominator)) + (e2@numerator * (denom / e2@denominator))return(simplify(rational(numer, denom)))})setMethod('-', signature(e1 ='rational', e2 ='rational'), function(e1, e2) { denom <-lcm(e1@denominator, e2@denominator) numer <- (e1@numerator * (denom / e1@denominator)) - (e2@numerator * (denom / e2@denominator))return(simplify(rational(numer, denom)))})setMethod('*', signature(e1 ='rational', e2 ='rational'), function(e1, e2) { numer <- e1@numerator * e2@numerator denom <- e1@denominator * e2@denominatorreturn(simplify(rational(numer, denom)))})setMethod('/', signature(e1 ='rational', e2 ='rational'), function(e1, e2) { numer <- e1@numerator * e2@denominator denom <- e1@denominator * e2@numeratorif (denom ==0L) stop('division by zero')return(simplify(rational(numer, denom)))})
2.2 b. Use your rational class to create three objects:
<simpleError in rational("one", "two"): Both numerator and denominator must be numeric.>
3 2. Problem 2 - plotly
3.1 a. Does the distribution of genre of sales across years appear to change?
3.1.1 Code
# Load the librarieslibrary(dplyr)
Attaching package: 'dplyr'
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
library(plotly)
Loading required package: ggplot2
Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':
last_plot
The following object is masked from 'package:stats':
filter
The following object is masked from 'package:graphics':
layout
# Load the dataart <-read.csv("C:\\Users\\Feixing\\Desktop\\stats 506\\data\\Problem Set 4\\df_for_ml_improved_new_market.csv")# Correct overlapping dataart$Genre___Others[art$Genre___Painting ==1] <-0# Create a single categorical variableart$genre <-ifelse(art$Genre___Painting ==1, "Painting",ifelse(art$Genre___Photography ==1, "Photography",ifelse(art$Genre___Print ==1, "Print",ifelse(art$Genre___Sculpture ==1, "Sculpture",ifelse(art$Genre___Others ==1, "Other", NA)))))# Data grouping and scale calculationart_genre_year <- art %>%group_by(year, genre) %>%summarise(count =n(), .groups ='drop') %>%group_by(year) %>%mutate(total =sum(count)) %>%ungroup() %>%mutate(frequency = count / total)# Plot the stack areap <-plot_ly(data = art_genre_year, x =~year, y =~frequency, type ='scatter', mode ='lines',stackgroup ='one', color =~genre) %>%layout(title ="Distribution of Art Sales Genre Across Years",xaxis =list(title ="Year"),yaxis =list(title ="Proportion", tickformat =",.0%"))# Output the plotp
3.1.2 Conclusion:
Painting: Initially, painting occupied a significant portion of the market in the early 2000s, but its share declined over time, particularly after 2005. This may indicate the rise of other art genres or changes in consumer preferences.
Photography: The proportion of photography was relatively low before 2000, but began to increase steadily starting in 2000, especially from 2005 to 2010. This significant increase may reflect the widespread adoption of digital photography technology and growing market interest in photographic art.
Sculpture: The proportion of sculpture sales remained relatively stable throughout the period, indicating a balanced market demand.
Print: The proportion of prints has risen since the 2000s, particularly after 2010, which may be associated with the growing popularity of prints as collectibles and decorative items.
Other: This category maintained a small proportion throughout the period, indicating it may include niche or specialized art genres.
3.2 b. Generate an interactive plot with plotly that can address both of these questions
3.2.1 Code
library(plotly)library(dplyr)# Load the dataart_sales <-read.csv("C:\\Users\\Feixing\\Desktop\\stats 506\\data\\Problem Set 4\\df_for_ml_improved_new_market.csv")art_sales$Genre___Others[art_sales$Genre___Painting ==1] <-0art_sales <- art_sales %>%mutate(genre =case_when( Genre___Painting ==1~"Painting", Genre___Photography ==1~"Photography", Genre___Print ==1~"Print", Genre___Sculpture ==1~"Sculpture", Genre___Others ==1~"Other",TRUE~NA_character_ ))# Calculate the average price of each art type for each yearyearly_prices <- art_sales %>%group_by(year, genre) %>%summarise(mean_price =mean(price_usd, na.rm =TRUE), .groups ='drop')# Calculate the average price for all types for each yearyearly_avg_prices <- art_sales %>%group_by(year) %>%summarise(yearly_avg_price =mean(price_usd, na.rm =TRUE), .groups ='drop')# Create interactive chartsplot <-plot_ly() %>%add_trace(data = yearly_prices, x =~year, y =~mean_price, type ='scatter', mode ='lines+markers', color =~genre, colors = RColorBrewer::brewer.pal(5, "Set1"), hoverinfo ='text', text =~paste('Year:', year, '<br>Mean Price: $', round(mean_price, 2))) %>%add_trace(data = yearly_avg_prices, x =~year, y =~yearly_avg_price, type ='scatter', mode ='lines', line =list(color ='black', width =2), hoverinfo ='text',text =~paste('Yearly Average: $', round(yearly_avg_price, 2)),name ='Yearly Average') %>%layout(title ="Change in Sales Price Over Time by Art Genre",xaxis =list(title ="Year"),yaxis =list(title ="Average Sales Price (USD)"),hovermode ='closest')# Output the resultplot
3.2.2 Conclusion:
Changes in Sales Prices Over Time:
Overall, the sales prices across various art genres show a trend of volatile growth. Particularly from 2000 to 2008, most genres saw significant price increases. Despite some subsequent declines, the general trend remained upward.
How Art Genres Influence Changes in Sales Prices:
Painting: Prices were relatively stable until 2005, then rose significantly, especially peaking in 2008 before declining thereafter.
Photography: Prices gradually increased starting in 2000, peaking between 2007 and 2008, and then slightly declining. The price fluctuations in photography may be related to advancements in technology and market acceptance.
Print: Post-2005, prices began to rise significantly, peaking in 2008 and then declining. The increased market demand for prints may have contributed to the rise in prices.
Sculpture: Exhibited minor price fluctuations with a generally stable trend, though there was an increase in 2008.
Other: Price fluctuations were minor and relatively stable, without significant long-term increases or decreases.
4 Problem 3 - data.table
4.1 a. Generate a table (which can just be a nicely printed tibble) reporting the mean and median departure delay per airport. Generate a second table (which again can be a nicely printed tibble) reporting the mean and median arrival delay per airport. Exclude any destination with under 10 flights.
4.1.1 Code
# Load the necessary librarieslibrary(data.table)
Attaching package: 'data.table'
The following objects are masked from 'package:dplyr':
between, first, last
library(nycflights13)# Load the data.tableflights_dt <-as.data.table(flights)airports_dt <-as.data.table(airports)# Departure datadep_delay_stats <- flights_dt[ , .(mean_dep_delay =mean(dep_delay, na.rm =TRUE),median_dep_delay =median(dep_delay, na.rm =TRUE),num_flights = .N), by = origin][num_flights >=10][order(-mean_dep_delay)]# Convert the airport code to the airport namedep_delay_stats[, origin := airports_dt[.SD, on = .(faa = origin), x.name]]# Arrival dataarr_delay_stats <- flights_dt[ , .(mean_arr_delay =mean(arr_delay, na.rm =TRUE),median_arr_delay =median(arr_delay, na.rm =TRUE),num_flights = .N), by = dest][num_flights >=10][order(-mean_arr_delay)]# Convert the airport code to the airport namearr_delay_stats[, dest := airports_dt[.SD, on = .(faa = dest), x.name]]# Print resultsprint(dep_delay_stats[, .(origin, mean_dep_delay, median_dep_delay)])
origin mean_dep_delay median_dep_delay
<char> <num> <num>
1: Newark Liberty Intl 15.10795 -1
2: John F Kennedy Intl 12.11216 -1
3: La Guardia 10.34688 -3
dest mean_arr_delay median_arr_delay
<char> <num> <num>
1: Columbia Metropolitan 41.764151 28.0
2: Tulsa Intl 33.659864 14.0
3: Will Rogers World 30.619048 16.0
4: Jackson Hole Airport 28.095238 15.0
5: Mc Ghee Tyson 24.069204 2.0
---
98: Seattle Tacoma Intl -1.099099 -11.0
99: Honolulu Intl -1.365193 -7.0
100: <NA> -3.835907 -9.0
101: John Wayne Arpt Orange Co -7.868227 -11.0
102: Palm Springs Intl -12.722222 -13.5
4.2 b. How many flights did the aircraft model with the fastest average speed take?
# Load the librarieslibrary(data.table)library(nycflights13)# Load the data.tableflights_dt <-data.table(flights)planes_dt <-data.table(planes)# Calculate the speedspeed_flights <- flights_dt[, .(distance, air_time), by = .(tailnum)]speed_flights[, speed := distance / air_time *60]model_speeds <-merge(speed_flights, planes_dt[, .(tailnum, model)], by ="tailnum")avg_speed_flights <- model_speeds[, .(average_speed =mean(speed, na.rm =TRUE),num_flights = .N), by = model]# Find the fastest average speed modelfastest_model <- avg_speed_flights[which.max(average_speed)]fastest_model_df <-data.table(model = fastest_model$model, average_speed = fastest_model$average_speed, num_flights = fastest_model$num_flights)# Output the dataprint(fastest_model_df)
model average_speed num_flights
<char> <num> <int>
1: 777-222 482.6254 4
5 References and Data Sources
The datasets used in this project are obtained from the following sources:
nycflights13 Data: Provides airline on-time data for all flights departing NYC in 2013. This comprehensive dataset includes metadata on airlines, airports, weather, and planes. Available from the Comprehensive R Archive Network (CRAN) at https://CRAN.R-project.org/package=nycflights13. Maintained by Hadley Wickham.
df_for_ml_improved_new_market: This dataset enumerates characteristics of art sales, including the sale prices and details about the artworks sold. It is intended for creating publication-ready plots that elucidate market trends and valuations in art sales. More information and dataset download are available at Repository Link.
6 Methods and Implementation
All data analyses were performed in the R environment, employing a range of techniques including data import, data cleaning, statistical analysis, and result visualization.
7 Code and Documentation Repository
This document and related code are hosted on GitHub for review and sharing purposes. Access link: GitHub Repository Link
8 Acknowledgements
Thanks to the course instructors and teaching assistants for their guidance on this assignment. Thanks to all data providers for supporting open data.
9 Notes
The analyses in this document are for academic purposes only, intended to fulfill the requirements of a statistics course. While every reasonable effort has been made to ensure the accuracy of the analysis results, the content of this document represents only the views of the author.